knitr::opts_chunk$set(warning = FALSE, message = FALSE) 
library(tidyverse)
library(Hmisc)
library(emmeans)
library(car)
library(gridExtra)
options(width = 100)

This analysis is modified from a part of my individual assignment in Business Statistic Module, during the Business Analytics course at Warwick Business School 2023-2024.

All rights to this model/code are exclusively reserved by the author (myself and WBS). This work is published solely for portfolio showcase purposes and is not intended for distribution, reproduction, or any commercial use. Any form of copying, reproduction, or redistribution—whether partial or in whole—without explicit prior consent from the author is strictly prohibited. Unauthorized use will be considered a breach of copyright.


Data Dictionary

The data is providing information of number of bike hire from Satander Cycle Hire Scheme during 2010 to 2023 by date. The bike hiring period cover the period that COVID respond policies were applied. The variables are described in the table below.

Variable Description
Date The date of record
Hires Number of bike hire of the Santander Cycle Hire Scheme
schools_closed Whether the school closures policy (complete closures) apply or not (0 not apply, 1 apply)
pubs_closed Whether the pub closures policy (exclude food serving pubs) apply or not (0 not apply, 1 apply)
shops_closed Whether the shop closures policy (for non-essential shops) apply or not (0 not apply, 1 apply)
eating_places_closed Whether the eating places closures policy (include food serving pubs) apply or not (0 not apply, 1 apply)
stay_at_home Whether the stay at home order apply or not (0 not apply, 1 apply)
household_mixing_indoors_banned Whether the household mixing indoors banned policy apply or not (0 not apply, 1 apply)
wfh Whether the working from home encouraged policy apply or not (0 not apply, 1 apply)
rule_of_6_indoors Whether the rule of 6 indoors policy apply or not (0 not apply, 1 apply)
curfew Whether the 10pm curfew on hospitality apply or not (0 not apply, 1 apply)
eat_out_to_help_out Whether the Eat Out to Help Out scheme apply or not (0 not apply, 1 apply)
day day of the week of the record
month month of the record
year year of the record

Read Data

# Read datafile and assign to 'bike' variable
bike <- read_csv('London_COVID_bikes.csv')

Data Integrity Check and Data Cleaning

# Check data integrity
str(bike) #Check data type
## spc_tbl_ [4,812 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ date                           : Date[1:4812], format: "2010-07-30" "2010-07-31" "2010-08-01" ...
##  $ Hires                          : num [1:4812] 6897 5564 4303 6642 7966 ...
##  $ schools_closed                 : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ pubs_closed                    : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ shops_closed                   : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ eating_places_closed           : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ stay_at_home                   : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ household_mixing_indoors_banned: num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ wfh                            : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ rule_of_6_indoors              : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ curfew                         : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ eat_out_to_help_out            : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ day                            : chr [1:4812] "Fri" "Sat" "Sun" "Mon" ...
##  $ month                          : chr [1:4812] "Jul" "Jul" "Aug" "Aug" ...
##  $ year                           : num [1:4812] 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   date = col_date(format = ""),
##   ..   Hires = col_double(),
##   ..   schools_closed = col_double(),
##   ..   pubs_closed = col_double(),
##   ..   shops_closed = col_double(),
##   ..   eating_places_closed = col_double(),
##   ..   stay_at_home = col_double(),
##   ..   household_mixing_indoors_banned = col_double(),
##   ..   wfh = col_double(),
##   ..   rule_of_6_indoors = col_double(),
##   ..   curfew = col_double(),
##   ..   eat_out_to_help_out = col_double(),
##   ..   day = col_character(),
##   ..   month = col_character(),
##   ..   year = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
# Change data type of interested variables to factor (wfh, rule_of_6_indoors, eat_out_to_help_out, day, month and year)
bike <- bike %>% mutate(day=factor(day, levels=c("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")))
bike <- bike %>% mutate(month=factor(month, levels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")))
bike <- bike %>% mutate(year=factor(year, levels=2010:2023))
bike <- bike %>% mutate(wfh=factor(wfh, levels=c(0,1), labels=c("Work-on-site", "Work from home")))
bike <- bike %>% mutate(rule_of_6_indoors=factor(rule_of_6_indoors, levels=c(0,1), labels=c("No rule of 6 indoors", "Rule of 6 indoors")))
bike <- bike %>% mutate(eat_out_to_help_out=factor(eat_out_to_help_out, levels=c(0,1), labels=c("No eat out policy", "Eat out to help out")))


str(bike) #Check data type again
## tibble [4,812 × 15] (S3: tbl_df/tbl/data.frame)
##  $ date                           : Date[1:4812], format: "2010-07-30" "2010-07-31" "2010-08-01" ...
##  $ Hires                          : num [1:4812] 6897 5564 4303 6642 7966 ...
##  $ schools_closed                 : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ pubs_closed                    : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ shops_closed                   : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ eating_places_closed           : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ stay_at_home                   : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ household_mixing_indoors_banned: num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ wfh                            : Factor w/ 2 levels "Work-on-site",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ rule_of_6_indoors              : Factor w/ 2 levels "No rule of 6 indoors",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ curfew                         : num [1:4812] 0 0 0 0 0 0 0 0 0 0 ...
##  $ eat_out_to_help_out            : Factor w/ 2 levels "No eat out policy",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ day                            : Factor w/ 7 levels "Sun","Mon","Tue",..: 6 7 1 2 3 4 5 6 7 1 ...
##  $ month                          : Factor w/ 12 levels "Jan","Feb","Mar",..: 7 7 8 8 8 8 8 8 8 8 ...
##  $ year                           : Factor w/ 14 levels "2010","2011",..: 1 1 1 1 1 1 1 1 1 1 ...
# Use summary to observe the statistical information of the interested data
summary(bike)
##       date                Hires       schools_closed     pubs_closed       shops_closed    
##  Min.   :2010-07-30   Min.   :    0   Min.   :0.00000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:2013-11-13   1st Qu.:19776   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :2017-02-28   Median :26356   Median :0.00000   Median :0.00000   Median :0.00000  
##  Mean   :2017-02-28   Mean   :26607   Mean   :0.02743   Mean   :0.05175   Mean   :0.04634  
##  3rd Qu.:2020-06-15   3rd Qu.:33481   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000  
##  Max.   :2023-09-30   Max.   :73094   Max.   :1.00000   Max.   :1.00000   Max.   :1.00000  
##                                                                                            
##  eating_places_closed  stay_at_home     household_mixing_indoors_banned             wfh      
##  Min.   :0.00000      Min.   :0.00000   Min.   :0.00000                 Work-on-site  :3718  
##  1st Qu.:0.00000      1st Qu.:0.00000   1st Qu.:0.00000                 Work from home:1094  
##  Median :0.00000      Median :0.00000   Median :0.00000                                      
##  Mean   :0.05175      Mean   :0.03616   Mean   :0.06525                                      
##  3rd Qu.:0.00000      3rd Qu.:0.00000   3rd Qu.:0.00000                                      
##  Max.   :1.00000      Max.   :1.00000   Max.   :1.00000                                      
##                                                                                              
##             rule_of_6_indoors     curfew                 eat_out_to_help_out  day     
##  No rule of 6 indoors:4716    Min.   :0.00000   No eat out policy  :4784     Sun:687  
##  Rule of 6 indoors   :  96    1st Qu.:0.00000   Eat out to help out:  28     Mon:688  
##                               Median :0.00000                                Tue:687  
##                               Mean   :0.01164                                Wed:687  
##                               3rd Qu.:0.00000                                Thu:687  
##                               Max.   :1.00000                                Fri:688  
##                                                                              Sat:688  
##      month           year     
##  Aug    : 434   2012   : 366  
##  Sep    : 420   2016   : 366  
##  Jul    : 405   2020   : 366  
##  Dec    : 404   2021   : 366  
##  Jan    : 403   2011   : 365  
##  Mar    : 403   2013   : 365  
##  (Other):2343   (Other):2618
# Found incorrect record in year 2021 which is not leap year but have 366 records (probably duplicated record)
summary(bike$year)
## 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 
##  155  365  366  365  365  365  366  365  365  365  366  366  365  273
# Get duplicated records
bike %>% filter(date == bike[duplicated(bike$date),'date']) # Two duplicated records were from December 13, 2021 which have differ in Work from home variable (one record have 0 and another have 1) which we decide to remove record with 0 due to the policy was effective on December 13, 2021 (https://www.gov.uk/government/news/prime-minister-confirms-move-to-plan-b-in-england)
## # A tibble: 2 × 15
##   date       Hires schools_closed pubs_closed shops_closed eating_places_closed stay_at_home
##   <date>     <dbl>          <dbl>       <dbl>        <dbl>                <dbl>        <dbl>
## 1 2021-12-13 25050              0           0            0                    0            0
## 2 2021-12-13 25050              0           0            0                    0            0
## # ℹ 8 more variables: household_mixing_indoors_banned <dbl>, wfh <fct>, rule_of_6_indoors <fct>,
## #   curfew <dbl>, eat_out_to_help_out <fct>, day <fct>, month <fct>, year <fct>
which(grepl('2021-12-13', bike$date)) # Get row number of duplicated records, which are 4155 and 4156
## [1] 4155 4156
bike <- bike[-4155,] # Remove specific duplicated records 
# We also found that the minimum of Hires is zero which is abnormal because we focus on analyse the data of bike hire services when the service is available. Found 2 records in Hires which have 0 bike hire, on 2022-09-10 and 2022-09-11 because it was not open on that weekend (https://londonist.com/london/transport/no-santander-cycle-hire-on-weekend-of-10-11-september), since we want to analyse the effect of the three COVID policies on bike hire, the data should only come from the date that bike rental is open for service. Thus, we should remove these 2 records.

bike %>% filter(Hires == 0)
## # A tibble: 2 × 15
##   date       Hires schools_closed pubs_closed shops_closed eating_places_closed stay_at_home
##   <date>     <dbl>          <dbl>       <dbl>        <dbl>                <dbl>        <dbl>
## 1 2022-09-10     0              0           0            0                    0            0
## 2 2022-09-11     0              0           0            0                    0            0
## # ℹ 8 more variables: household_mixing_indoors_banned <dbl>, wfh <fct>, rule_of_6_indoors <fct>,
## #   curfew <dbl>, eat_out_to_help_out <fct>, day <fct>, month <fct>, year <fct>
bike <- bike %>% filter(!Hires == 0)

Plot the Data

Plot distribution of the data

# Plot data on Hires

color_1 <- c("#67000d", "#a50f15","#cb181d","#ef3b2c","#fb6a4a","#fc9272", "#fcbba1","#fee0d2","#ece7f2","#d0d1e6",  "#74a9cf","#3690c0", "#0570b0", "#023858")

ggplot(data=bike)+
  geom_histogram(aes(x=Hires, y=..density.. ,fill = year),position = "dodge", binwidth = 3000, alpha=0.6)+
  labs(x="Number of Bike hires per year", y="Density", title="Histrogram of bike hires with density curve and normal distribution curve")+
  scale_fill_manual(values = color_1, name="Year")+
  geom_vline(xintercept = mean(bike$Hires), colour="black" )+
  geom_density(aes(x=Hires, y=..density..), colour="black" ,alpha=0.5)+
  stat_function(fun=function(x) {dnorm(x, mean=mean(bike$Hires), sd=sd(bike$Hires))}, col="red", alpha=1)+
  scale_x_continuous(labels = scales::comma)+
  scale_y_continuous(labels = scales::comma)+
  theme(legend.position="bottom")
Figure 1.1 Histrogram of bike hires with density curve and normal distribution curve

Figure 1.1 Histrogram of bike hires with density curve and normal distribution curve

The bike hires data is normally distributed around mean (approx. 26,500). The distribution in the years before COVID (red bar) are positive skewed, while the years after COVID (blue bar) are slightly negative skewed, but in the overall distribution is normal distributed.

The histrogram show extreme outlier above 60,000 bikes per day, but these outlier does not impact to the distribution of the whole data much, moreover, it is not clear that it occurred from data collection process, so, we will not removing these outlier.

Plot the distribution of three policies across years

# Plot the distribution of three policies across year
alpha_1 <- ifelse(bike$wfh=="Work from home", 0.2, 0.1)
alpha_2 <- ifelse(bike$rule_of_6_indoors=="Rule of 6 indoors", 0.3, 0.1)
alpha_3 <- ifelse(bike$eat_out_to_help_out=="Eat out to help out", 0.3, 0.1)
color_2 <- c("#737373", "#0570b0")

grid.arrange(
ggplot(data=bike, aes(y=Hires, x=year, col=wfh))+
  labs(x="Year", y="Number of Bike hires per day", title="The distribution of number of bike hire from 2010 through 2023 with work from home policy", color="Policy")+
  scale_colour_manual(values = color_2)+
  geom_jitter(position = position_jitter(0.4), alpha=alpha_1)+
  geom_violin(color="black",  alpha=0.005)+
  stat_summary(show.legend = FALSE,fun = "mean", geom = "point", color = "black", aes(shape="mean"))+
  scale_y_continuous(labels = scales::comma),

ggplot(data=bike, aes(y=Hires, x=year, col=rule_of_6_indoors))+
  labs(x="Year", y="Number of Bike hires per day", title="The distribution of number of bike hire from 2010 through 2023 with rule of 6 indoor policy", color="Policy")+
  scale_colour_manual(values = color_2)+
  geom_jitter(position = position_jitter(0.4), alpha=alpha_2)+
  geom_violin(color="black",  alpha=0.005)+
  stat_summary(show.legend = FALSE,fun = "mean", geom = "point", color = "black", aes(shape="mean"))+
  scale_y_continuous(labels = scales::comma),

ggplot(data=bike, aes(y=Hires, x=year, col=eat_out_to_help_out))+
  labs(x="Year", y="Number of Bike hires per day", title="The distribution of number of bike hire from 2010 through 2023 with eat out to help out policy", color="Policy")+
  scale_colour_manual(values = color_2)+
  geom_jitter(position = position_jitter(0.4), alpha=alpha_3)+
  geom_violin(color="black",  alpha=0.005)+
  stat_summary(show.legend = FALSE,fun = "mean", geom = "point", color = "black", aes(shape="mean"))+
  scale_y_continuous(labels = scales::comma)

,nrow=3)
Figure 1.2 The distribution of number of bike hire from 2010 through 2023 categorise by policies

Figure 1.2 The distribution of number of bike hire from 2010 through 2023 categorise by policies

Jitter plot of three policies divided by year show the proportion of date that the policy apply within each year. the overall of bike rental have been increasing since 2010 and stay steady between 2014 to 2019, and continue increase after 2019, then dramatically drop in first 9 months of 2023.

The plot show that working from home policy have been introduced in 2020, the policy continue effective and more popular in 2022 and 2023. During 2020 and 2021, the plot show that when working from home was not forced, the number of bike hires seems to be higher than the average, but the highest bike hire date still when working from home was encouraged.

While rule of 6 indoors banned policy and Eat out to help out policy had been effective for a shorter period and not going to continue after 2021 and 2020 respectively. On the other hand, the plots show that when rule of 6 indoors banned policy and Eat out to help out policy were forced, the number of bike rental have been higher than the average.

Plot the distribution of three policies across months

# Plot the distribution of three policies across month

grid.arrange(
ggplot(data=bike, aes(y=Hires, x=month, col=wfh))+
  labs(x="Month", y="Number of Bike hires per day", title="The distribution of number of bike hire across month with work from home policy", color="Policy")+
  scale_colour_manual(values = color_2)+
  geom_jitter(position = position_jitter(0.4), alpha=alpha_1)+
  geom_violin(color="black",  alpha=0.005)+
  stat_summary(show.legend = FALSE,fun = "mean", geom = "point", color = "black", aes(shape="mean"))+
  scale_y_continuous(labels = scales::comma),

ggplot(data=bike, aes(y=Hires, x=month, col=rule_of_6_indoors))+
  labs(x="Month", y="Number of Bike hires per day", title="The distribution of number of bike hire across month with rule of 6 indoor policy", color="Policy")+
  scale_colour_manual(values = color_2)+
  geom_jitter(position = position_jitter(0.4), alpha=alpha_2)+
  geom_violin(color="black",  alpha=0.005)+
  stat_summary(show.legend = FALSE,fun = "mean", geom = "point", color = "black", aes(shape="mean"))+
  scale_y_continuous(labels = scales::comma),

ggplot(data=bike, aes(y=Hires, x=month, col=eat_out_to_help_out))+
  labs(x="Month", y="Number of Bike hires per day", title="The distribution of number of bike hire across month with eat out to help out policy", color="Policy")+
  scale_colour_manual(values = color_2)+
  geom_jitter(position = position_jitter(0.4), alpha=alpha_3)+
  geom_violin(color="black",  alpha=0.005)+
  stat_summary(show.legend = FALSE,fun = "mean", geom = "point", color = "black", aes(shape="mean"))+
  scale_y_continuous(labels = scales::comma)

, nrow=3)
Figure 1.3 The distribution of number of bike hire across months categorise by policies

Figure 1.3 The distribution of number of bike hire across months categorise by policies

Jitter plot of three policies by month shows that the seasonal of bike rental is start from April and at peak in July, before start decreasing.

For each policies, working from home have show some positive effect on March to July (when the bike rental season is going to start) but does not have obvious effect on August to Jan (when the bike rental is continue decreasing). Next, rule of 6 indoors banned policy have applied only for May, June, July, September and October but does not show obviously interaction effect between these months. And the eat out to help out policy only be applied in August and cannot provided any information across month.

Plot the distribution of three policies across days

# Plot the distribution of three policies across day
color_3 <- c("#252525", "#023858")

grid.arrange(
ggplot(data=bike, aes(y=Hires, x=day, col=wfh))+
  labs(x="Day", y="Number of Bike hires per day", title="The distribution of number of bike hire across day with work from home policy", color="Policy")+
  scale_colour_manual(values = color_2)+
  geom_jitter(position = position_jitter(0.4), alpha=alpha_1)+
  geom_violin(color="black",  alpha=0.005)+
  geom_violin(aes(fill=wfh),  alpha=0.5,show.legend = FALSE)+
  scale_fill_manual(values = color_3)+
  stat_summary(show.legend = FALSE,fun = "mean", geom = "point", color = "black", aes(shape="mean"))+
  scale_y_continuous(labels = scales::comma),

ggplot(data=bike, aes(y=Hires, x=day, col=rule_of_6_indoors))+
  labs(x="Day", y="Number of Bike hires per day", title="The distribution of number of bike hire across day with rule of 6 indoor policy", color="Policy")+
  scale_colour_manual(values = color_2)+
  geom_jitter(position = position_jitter(0.4), alpha=alpha_2)+
  geom_violin(color="black",  alpha=0.005)+
  geom_violin(aes(fill=rule_of_6_indoors), alpha=0.5,show.legend = FALSE)+
  scale_fill_manual(values = color_3)+
  stat_summary(show.legend = FALSE,fun = "mean", geom = "point", color = "black", aes(shape="mean"))+
  scale_y_continuous(labels = scales::comma),

ggplot(data=bike, aes(y=Hires, x=day, col=eat_out_to_help_out))+
  labs(x="Day", y="Number of Bike hires per day", title="The distribution of number of bike hire across day with eat out to help out policy", color="Policy")+
  scale_colour_manual(values = color_2)+
  geom_jitter(position = position_jitter(0.4), alpha=alpha_3)+
  geom_violin(color="black", alpha=0.005)+
  geom_violin(aes(fill=eat_out_to_help_out), alpha=0.5,show.legend = FALSE)+
  scale_fill_manual(values = color_3)+
  stat_summary(show.legend = FALSE,fun = "mean", geom = "point", color = "black", aes(shape="mean"))+
  scale_y_continuous(labels = scales::comma)

, nrow=3)
Figure 1.4 The distribution of number of bike hire across days categorise by policies

Figure 1.4 The distribution of number of bike hire across days categorise by policies

For effect of 3 policies across days, we add the violin plot when the policy is on and off to compare the distribution shape of two class in difference day.

The jitter and violin plot show that, in the overall, the mean of bike hires is higher in weekdays than weekends but the distribution of weekends are wider (violin plots are taller) than weekdays.

For work from home, the policy have some effect on Saturday and Sunday which violin plot of work from home in weekends have been moving upward, while violin plot for weekdays is clustered at the same level between working on site and working from home, but the highest hires is higher when working from home is applied.

For indoors banned policy also seem to have some effect on Friday, Saturday and Sunday as well, since the policy apply on these days make the violin shape vertical flip (from violin to upside down bottle), which mean the bike hires tend to increase in Friday, Saturday and Sunday when the policy applied.

On the contrary, the effect of eat out to help out policy is not clearly seen across days of week because the policy only apply for 28 days (only 4 observations per day).

Regression Analysis

Multicollinearity Check

# Check Multicollinearity between 3 policies, days, months, years, and bike hires
# Because 3 policies, days, months and years are categorical data, we will use VIF test to test correlation between 3 policies and days/months/years

# Create regression model to compute VIF score
m.bike.3pols.d.m.y <- lm(Hires ~ wfh + rule_of_6_indoors + eat_out_to_help_out + day + month + year, data = bike)
vif(m.bike.3pols.d.m.y)
##                         GVIF Df GVIF^(1/(2*Df))
## wfh                 5.967099  1        2.442765
## rule_of_6_indoors   1.265146  1        1.124787
## eat_out_to_help_out 1.228996  1        1.108601
## day                 1.000318  6        1.000026
## month               1.246927 11        1.010081
## year                7.331957 13        1.079637
# We found that the VIF of working from home policy and year of record is highly multicollinear with other variable (It is probably because the policy was announced in 2020 and continue apply until 2023, which make it predictable by year. While other policies were apply for short period of time compare to working from home policy).
# Create regression model by removing year from the model and compute VIF score
m.bike.3pols.d.m <- lm(Hires ~ wfh + rule_of_6_indoors + eat_out_to_help_out + day + month, data = bike)
vif(m.bike.3pols.d.m)
##                         GVIF Df GVIF^(1/(2*Df))
## wfh                 1.082615  1        1.040488
## rule_of_6_indoors   1.093566  1        1.045737
## eat_out_to_help_out 1.063576  1        1.031298
## day                 1.000235  6        1.000020
## month               1.122066 11        1.005249
# The VIF (GVIF) of model without show less multicollinearity.

Effect of working from home encouraged policy, rule of 6 indoors banned policy and eat out help out policy on bike hire

# Create regression model predicting bike hires by using three policies as predictor, then compare the adjusted R-square of model with and without interaction between each policy

# 3 policies without interaction

# Create regression model
m.bike.policy <- lm(Hires ~  eat_out_to_help_out + wfh + rule_of_6_indoors  , data = bike)
summary(m.bike.policy)
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + wfh + rule_of_6_indoors, 
##     data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -27055  -6761   -229   6839  46984 
## 
## Coefficients:
##                                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                             26109.5      157.3 165.944  < 2e-16 ***
## eat_out_to_help_outEat out to help out  10308.9     1812.6   5.687 1.37e-08 ***
## wfhWork from home                        1231.5      338.6   3.637 0.000279 ***
## rule_of_6_indoorsRule of 6 indoors       8492.9     1013.5   8.380  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9555 on 4805 degrees of freedom
## Multiple R-squared:  0.02695,    Adjusted R-squared:  0.02634 
## F-statistic: 44.36 on 3 and 4805 DF,  p-value: < 2.2e-16
# Create regression model to analyse effect of 3 policies with interaction on bike hire
m.bike.policy.iter <- lm(Hires ~  wfh * eat_out_to_help_out * rule_of_6_indoors  , data = bike)
summary(m.bike.policy.iter)
## 
## Call:
## lm(formula = Hires ~ wfh * eat_out_to_help_out * rule_of_6_indoors, 
##     data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26325  -6743   -246   6841  47002 
## 
## Coefficients: (3 not defined because of singularities)
##                                                                                             Estimate
## (Intercept)                                                                                  26092.1
## wfhWork from home                                                                             1313.0
## eat_out_to_help_outEat out to help out                                                       10326.4
## rule_of_6_indoorsRule of 6 indoors                                                           16544.4
## wfhWork from home:eat_out_to_help_outEat out to help out                                          NA
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors                                         -8846.0
## eat_out_to_help_outEat out to help out:rule_of_6_indoorsRule of 6 indoors                         NA
## wfhWork from home:eat_out_to_help_outEat out to help out:rule_of_6_indoorsRule of 6 indoors       NA
##                                                                                             Std. Error
## (Intercept)                                                                                      157.4
## wfhWork from home                                                                                340.0
## eat_out_to_help_outEat out to help out                                                          1811.7
## rule_of_6_indoorsRule of 6 indoors                                                              3380.2
## wfhWork from home:eat_out_to_help_outEat out to help out                                            NA
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors                                            3543.0
## eat_out_to_help_outEat out to help out:rule_of_6_indoorsRule of 6 indoors                           NA
## wfhWork from home:eat_out_to_help_outEat out to help out:rule_of_6_indoorsRule of 6 indoors         NA
##                                                                                             t value
## (Intercept)                                                                                 165.759
## wfhWork from home                                                                             3.861
## eat_out_to_help_outEat out to help out                                                        5.700
## rule_of_6_indoorsRule of 6 indoors                                                            4.895
## wfhWork from home:eat_out_to_help_outEat out to help out                                         NA
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors                                         -2.497
## eat_out_to_help_outEat out to help out:rule_of_6_indoorsRule of 6 indoors                        NA
## wfhWork from home:eat_out_to_help_outEat out to help out:rule_of_6_indoorsRule of 6 indoors      NA
##                                                                                             Pr(>|t|)
## (Intercept)                                                                                  < 2e-16
## wfhWork from home                                                                           0.000114
## eat_out_to_help_outEat out to help out                                                      1.27e-08
## rule_of_6_indoorsRule of 6 indoors                                                          1.02e-06
## wfhWork from home:eat_out_to_help_outEat out to help out                                          NA
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors                                        0.012567
## eat_out_to_help_outEat out to help out:rule_of_6_indoorsRule of 6 indoors                         NA
## wfhWork from home:eat_out_to_help_outEat out to help out:rule_of_6_indoorsRule of 6 indoors       NA
##                                                                                                
## (Intercept)                                                                                 ***
## wfhWork from home                                                                           ***
## eat_out_to_help_outEat out to help out                                                      ***
## rule_of_6_indoorsRule of 6 indoors                                                          ***
## wfhWork from home:eat_out_to_help_outEat out to help out                                       
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors                                        *  
## eat_out_to_help_outEat out to help out:rule_of_6_indoorsRule of 6 indoors                      
## wfhWork from home:eat_out_to_help_outEat out to help out:rule_of_6_indoorsRule of 6 indoors    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9550 on 4804 degrees of freedom
## Multiple R-squared:  0.02821,    Adjusted R-squared:  0.0274 
## F-statistic: 34.86 on 4 and 4804 DF,  p-value: < 2.2e-16
# The adjusted R-squared of the model with interaction show better result compare to model without interaction but the model result also show that the eat out to help out campaign have less number of records (28 records) and not have enough power to predict the effect, it shows NA when interact with work from home and rule of 6 indoors banned policies. 
# Create interaction model by remove the interaction between eat out to help out policy to reduce the complexity of the model.

# Create regression model of 3 policies with interaction between work from home and rule of 6 indoors banned policies

m.bike.eat.iter <- lm(Hires ~  eat_out_to_help_out + wfh * rule_of_6_indoors  , data = bike)
# Compare model with and without interaction by using ANOVA to see improvement after add interaction effect
anova(m.bike.policy, m.bike.eat.iter)
## Analysis of Variance Table
## 
## Model 1: Hires ~ eat_out_to_help_out + wfh + rule_of_6_indoors
## Model 2: Hires ~ eat_out_to_help_out + wfh * rule_of_6_indoors
##   Res.Df        RSS Df Sum of Sq      F  Pr(>F)  
## 1   4805 4.3873e+11                              
## 2   4804 4.3816e+11  1 568556120 6.2337 0.01257 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

By adding the interaction effect of work from home and rule of 6 indoors banned policy to the model is significantly improves the fit \(F(1,4804)=6.23, p < 0.013\).

# Using ANOVA and emmeans to see the interaction effect of work from home and rule of 6 indoors policies
anova(m.bike.eat.iter)
## Analysis of Variance Table
## 
## Response: Hires
##                         Df     Sum Sq    Mean Sq F value    Pr(>F)    
## eat_out_to_help_out      1 2.7047e+09 2704702089 29.6545 5.420e-08 ***
## wfh                      1 3.0337e+09 3033726245 33.2620 8.558e-09 ***
## rule_of_6_indoors        1 6.4116e+09 6411596521 70.2972 < 2.2e-16 ***
## wfh:rule_of_6_indoors    1 5.6856e+08  568556120  6.2337   0.01257 *  
## Residuals             4804 4.3816e+11   91207047                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(  m.bike.eat.iter.emm <- emmeans(m.bike.eat.iter, ~eat_out_to_help_out + wfh * rule_of_6_indoors)  )
##  eat_out_to_help_out wfh            rule_of_6_indoors    emmean   SE   df lower.CL upper.CL
##  No eat out policy   Work-on-site   No rule of 6 indoors  26092  157 4804    25783    26401
##  Eat out to help out Work-on-site   No rule of 6 indoors  36418 1805 4804    32880    39957
##  No eat out policy   Work from home No rule of 6 indoors  27405  301 4804    26814    27996
##  Eat out to help out Work from home No rule of 6 indoors  37731 1837 4804    34131    41332
##  No eat out policy   Work-on-site   Rule of 6 indoors     42636 3377 4804    36017    49256
##  Eat out to help out Work-on-site   Rule of 6 indoors     52963 3832 4804    45451    60475
##  No eat out policy   Work from home Rule of 6 indoors     35104 1018 4804    33108    37099
##  Eat out to help out Work from home Rule of 6 indoors     45430 2078 4804    41356    49504
## 
## Confidence level used: 0.95
# Plot prediction to show interaction effect of work from home and rule of 6 indoors
ggplot(summary(m.bike.eat.iter.emm), aes(x=wfh, y=emmean, ymin=lower.CL, ymax=upper.CL, colour=rule_of_6_indoors)) + 
  geom_point() + 
  geom_linerange() + 
  labs(x="Policy apply", y="Predicted Bike Hires", colour="Rule of 6 indoors policy", title="Number of bike hires prediction with the effect of three policies") + 
  facet_grid(.~eat_out_to_help_out) +  
  geom_line()
Figure 1.5 Number of bike hires prediction with the effect of three policies

Figure 1.5 Number of bike hires prediction with the effect of three policies

The chart above show interaction between work from home policy and rule of 6 indoors policy, when rule of 6 policy apply, the work from home policy have negative effect on bike hire, but on the other hand, when there was no rule of 6 indoors, the work from policy have slightly positive on bike hire. But the eat out to help out policy does not show the interaction effect to those 2 policies.

# Using summary and confint to get effect and confident interval of model
summary(m.bike.eat.iter)
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + wfh * rule_of_6_indoors, 
##     data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26325  -6743   -246   6841  47002 
## 
## Coefficients:
##                                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                           26092.1      157.4 165.759  < 2e-16 ***
## eat_out_to_help_outEat out to help out                10326.4     1811.7   5.700 1.27e-08 ***
## wfhWork from home                                      1313.0      340.0   3.861 0.000114 ***
## rule_of_6_indoorsRule of 6 indoors                    16544.4     3380.2   4.895 1.02e-06 ***
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors  -8846.0     3543.0  -2.497 0.012567 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9550 on 4804 degrees of freedom
## Multiple R-squared:  0.02821,    Adjusted R-squared:  0.0274 
## F-statistic: 34.86 on 4 and 4804 DF,  p-value: < 2.2e-16
cbind(coef(m.bike.eat.iter), confint(m.bike.eat.iter))
##                                                                      2.5 %    97.5 %
## (Intercept)                                          26092.063  25783.4683 26400.658
## eat_out_to_help_outEat out to help out               10326.365   6774.6494 13878.081
## wfhWork from home                                     1312.989    646.3719  1979.607
## rule_of_6_indoorsRule of 6 indoors                   16544.437   9917.7239 23171.149
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors -8845.967 -15791.8960 -1900.038

Without controlling the effect of Year, Month or Day, the effect of eat out to help out, work from home and rule of 6 indoors policies are described as below,

The eat out to help out policy have significant positive effect on bike hires (\(\beta\) = 10326, CI [6775, 13878], \(t\)(4804) = 5.7, \(p\) < 0.0001).

The work from home policy also have significant positive effect on bike hires (\(\beta\) = 1313, CI [646, 1980], \(t\)(4804) = 3.9, \(p\) = 0.00011).

The effect of rule of 6 indoors on bike hires is significant positive (\(\beta\) = 16544, CI [9918, 23171], \(t\)(4804) = 4.9, \(p\) < 0.0001).

Lastly, the interaction effect of work from home to rule of 6 indoors policy is significant negative (\(\beta\) = -8846.0, CI [-15792, -1900], \(t\)(4804) = -2.5, \(p\) = 0.013) and eat out to help out policy does not have prediction power enough to identify interaction effect on work from home and rule of 6 indoors policy.

Effect of Day and Month on bike hires

# Because VIF value show that year is multicollinear with WFH, so we will not use it for controlling variable.

# We start look into effect of day on bike hires

# Adding day as control variable to the regression model we already created above.
m.bike.withday <- lm(Hires ~  eat_out_to_help_out + day + wfh * rule_of_6_indoors, data = bike)
summary(m.bike.withday) # Day variable has significant main effect to the bike hires
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + day + wfh * rule_of_6_indoors, 
##     data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -24335  -6585   -688   6683  45078 
## 
## Coefficients:
##                                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                           22093.3      363.3  60.813  < 2e-16 ***
## eat_out_to_help_outEat out to help out                10323.2     1767.7   5.840 5.57e-09 ***
## dayMon                                                 3653.7      503.0   7.264 4.37e-13 ***
## dayTue                                                 5814.5      503.0  11.560  < 2e-16 ***
## dayWed                                                 5923.4      503.0  11.777  < 2e-16 ***
## dayThu                                                 5922.3      503.0  11.774  < 2e-16 ***
## dayFri                                                 4709.1      502.8   9.366  < 2e-16 ***
## daySat                                                 1990.6      503.0   3.958 7.68e-05 ***
## wfhWork from home                                      1299.5      331.8   3.917 9.10e-05 ***
## rule_of_6_indoorsRule of 6 indoors                    16584.8     3298.5   5.028 5.14e-07 ***
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors  -8948.3     3457.3  -2.588  0.00968 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9319 on 4798 degrees of freedom
## Multiple R-squared:  0.07594,    Adjusted R-squared:  0.07401 
## F-statistic: 39.43 on 10 and 4798 DF,  p-value: < 2.2e-16
# Create another model with interaction effect of day on work from home, 6 indoors and eat out policy
m.bike.withday.wfh.iter <- lm(Hires ~  eat_out_to_help_out + day * wfh + wfh * rule_of_6_indoors, data = bike)
summary(m.bike.withday.wfh.iter) # The interaction effect of day to work from home also significant for almost day, except Saturday.
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + day * wfh + wfh * 
##     rule_of_6_indoors, data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -24873  -6412   -739   6605  44697 
## 
## Coefficients:
##                                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                           21347.8      401.1  53.221  < 2e-16 ***
## eat_out_to_help_outEat out to help out                10322.3     1753.9   5.885 4.24e-09 ***
## dayMon                                                 4902.0      566.9   8.647  < 2e-16 ***
## dayTue                                                 6945.6      567.4  12.241  < 2e-16 ***
## dayWed                                                 7151.4      567.4  12.603  < 2e-16 ***
## dayThu                                                 7049.4      567.4  12.424  < 2e-16 ***
## dayFri                                                 5710.7      567.2  10.069  < 2e-16 ***
## daySat                                                 1479.3      566.9   2.610 0.009095 ** 
## wfhWork from home                                      4616.9      849.9   5.432 5.85e-08 ***
## rule_of_6_indoorsRule of 6 indoors                    16521.2     3272.7   5.048 4.63e-07 ***
## dayMon:wfhWork from home                              -5549.1     1195.0  -4.644 3.51e-06 ***
## dayTue:wfhWork from home                              -5008.3     1192.3  -4.201 2.71e-05 ***
## dayWed:wfhWork from home                              -5432.4     1192.3  -4.556 5.34e-06 ***
## dayThu:wfhWork from home                              -4991.1     1192.3  -4.186 2.89e-05 ***
## dayFri:wfhWork from home                              -4443.1     1192.2  -3.727 0.000196 ***
## daySat:wfhWork from home                               2249.6     1195.0   1.883 0.059820 .  
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors  -8834.0     3430.3  -2.575 0.010046 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9246 on 4792 degrees of freedom
## Multiple R-squared:  0.09149,    Adjusted R-squared:  0.08846 
## F-statistic: 30.16 on 16 and 4792 DF,  p-value: < 2.2e-16
m.bike.withday.wfh.6indoors.iter <- lm(Hires ~  eat_out_to_help_out + day * wfh + day * rule_of_6_indoors + wfh * rule_of_6_indoors, data = bike)
summary(m.bike.withday.wfh.6indoors.iter) # The interaction effect of day to rule of 6 indoors policy is not statistically significant.
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + day * wfh + day * 
##     rule_of_6_indoors + wfh * rule_of_6_indoors, data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26046  -6428   -720   6599  44698 
## 
## Coefficients:
##                                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                          21344.54     401.02  53.226  < 2e-16 ***
## eat_out_to_help_outEat out to help out               10322.31    1753.36   5.887 4.20e-09 ***
## dayMon                                                4917.93     566.84   8.676  < 2e-16 ***
## dayTue                                                6949.73     567.29  12.251  < 2e-16 ***
## dayWed                                                7151.19     567.29  12.606  < 2e-16 ***
## dayThu                                                7051.04     567.29  12.429  < 2e-16 ***
## dayFri                                                5719.66     567.02  10.087  < 2e-16 ***
## daySat                                                1471.48     566.76   2.596 0.009452 ** 
## wfhWork from home                                     4520.97     870.19   5.195 2.13e-07 ***
## rule_of_6_indoorsRule of 6 indoors                   18243.11    4130.36   4.417 1.02e-05 ***
## dayMon:wfhWork from home                             -5170.48    1226.40  -4.216 2.53e-05 ***
## dayTue:wfhWork from home                             -4835.75    1226.74  -3.942 8.20e-05 ***
## dayWed:wfhWork from home                             -5445.58    1226.74  -4.439 9.24e-06 ***
## dayThu:wfhWork from home                             -4926.49    1226.74  -4.016 6.01e-05 ***
## dayFri:wfhWork from home                             -4062.90    1226.62  -3.312 0.000932 ***
## daySat:wfhWork from home                              1936.11    1228.17   1.576 0.114995    
## dayMon:rule_of_6_indoorsRule of 6 indoors            -5086.99    3698.55  -1.375 0.169071    
## dayTue:rule_of_6_indoorsRule of 6 indoors            -2208.99    3700.49  -0.597 0.550573    
## dayWed:rule_of_6_indoorsRule of 6 indoors               86.76    3700.49   0.023 0.981297    
## dayThu:rule_of_6_indoorsRule of 6 indoors             -874.88    3700.49  -0.236 0.813114    
## dayFri:rule_of_6_indoorsRule of 6 indoors            -4775.34    3700.49  -1.290 0.196953    
## daySat:rule_of_6_indoorsRule of 6 indoors             4158.27    3763.02   1.105 0.269200    
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors -9283.34    3443.72  -2.696 0.007048 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9243 on 4786 degrees of freedom
## Multiple R-squared:  0.09318,    Adjusted R-squared:  0.08901 
## F-statistic: 22.35 on 22 and 4786 DF,  p-value: < 2.2e-16
m.bike.withday.wfh.6indoors.eat.iter <- lm(Hires ~  eat_out_to_help_out + day * wfh + day * rule_of_6_indoors + day * eat_out_to_help_out  + wfh * rule_of_6_indoors, data = bike)
summary(m.bike.withday.wfh.6indoors.eat.iter) # There was no interaction effect between day and eat out policy.
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + day * wfh + day * 
##     rule_of_6_indoors + day * eat_out_to_help_out + wfh * rule_of_6_indoors, 
##     data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -26050  -6430   -740   6606  44674 
## 
## Coefficients:
##                                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                          21300.53     402.40  52.934  < 2e-16 ***
## eat_out_to_help_outEat out to help out               16175.22    4639.85   3.486 0.000494 ***
## dayMon                                                4972.86     569.10   8.738  < 2e-16 ***
## dayTue                                                6999.44     569.56  12.289  < 2e-16 ***
## dayWed                                                7211.87     569.56  12.662  < 2e-16 ***
## dayThu                                                7119.27     569.56  12.500  < 2e-16 ***
## dayFri                                                5787.63     569.29  10.166  < 2e-16 ***
## daySat                                                1478.29     569.02   2.598 0.009407 ** 
## wfhWork from home                                     4564.69     870.97   5.241 1.67e-07 ***
## rule_of_6_indoorsRule of 6 indoors                   18245.60    4131.21   4.417 1.03e-05 ***
## dayMon:wfhWork from home                             -5225.00    1227.62  -4.256 2.12e-05 ***
## dayTue:wfhWork from home                             -4885.13    1227.97  -3.978 7.05e-05 ***
## dayWed:wfhWork from home                             -5505.86    1227.97  -4.484 7.51e-06 ***
## dayThu:wfhWork from home                             -4994.29    1227.97  -4.067 4.84e-05 ***
## dayFri:wfhWork from home                             -4130.43    1227.85  -3.364 0.000774 ***
## daySat:wfhWork from home                              1929.34    1229.40   1.569 0.116636    
## dayMon:rule_of_6_indoorsRule of 6 indoors            -5092.22    3699.32  -1.377 0.168722    
## dayTue:rule_of_6_indoorsRule of 6 indoors            -2213.08    3701.26  -0.598 0.549917    
## dayWed:rule_of_6_indoorsRule of 6 indoors               81.81    3701.26   0.022 0.982366    
## dayThu:rule_of_6_indoorsRule of 6 indoors             -880.41    3701.26  -0.238 0.811993    
## dayFri:rule_of_6_indoorsRule of 6 indoors            -4780.85    3701.26  -1.292 0.196529    
## daySat:rule_of_6_indoorsRule of 6 indoors             4157.70    3763.80   1.105 0.269365    
## eat_out_to_help_outEat out to help out:dayMon        -7303.86    6561.73  -1.113 0.265722    
## eat_out_to_help_outEat out to help out:dayTue        -6606.44    6561.77  -1.007 0.314079    
## eat_out_to_help_outEat out to help out:dayWed        -8060.37    6561.77  -1.228 0.219364    
## eat_out_to_help_outEat out to help out:dayThu        -9061.27    6561.77  -1.381 0.167369    
## eat_out_to_help_outEat out to help out:dayFri        -9032.63    6561.75  -1.377 0.168714    
## eat_out_to_help_outEat out to help out:daySat         -906.04    6561.73  -0.138 0.890183    
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors -9282.08    3444.44  -2.695 0.007068 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9245 on 4780 degrees of freedom
## Multiple R-squared:  0.09394,    Adjusted R-squared:  0.08863 
## F-statistic:  17.7 on 28 and 4780 DF,  p-value: < 2.2e-16
# Using ANOVA to compare the improvement of models when controlling day
anova(m.bike.eat.iter, m.bike.withday, m.bike.withday.wfh.iter, m.bike.withday.wfh.6indoors.iter, m.bike.withday.wfh.6indoors.eat.iter)
## Analysis of Variance Table
## 
## Model 1: Hires ~ eat_out_to_help_out + wfh * rule_of_6_indoors
## Model 2: Hires ~ eat_out_to_help_out + day + wfh * rule_of_6_indoors
## Model 3: Hires ~ eat_out_to_help_out + day * wfh + wfh * rule_of_6_indoors
## Model 4: Hires ~ eat_out_to_help_out + day * wfh + day * rule_of_6_indoors + 
##     wfh * rule_of_6_indoors
## Model 5: Hires ~ eat_out_to_help_out + day * wfh + day * rule_of_6_indoors + 
##     day * eat_out_to_help_out + wfh * rule_of_6_indoors
##   Res.Df        RSS Df  Sum of Sq       F    Pr(>F)    
## 1   4804 4.3816e+11                                    
## 2   4798 4.1664e+11  6 2.1520e+10 41.9656 < 2.2e-16 ***
## 3   4792 4.0963e+11  6 7.0126e+09 13.6755 1.844e-15 ***
## 4   4786 4.0887e+11  6 7.5975e+08  1.4816    0.1801    
## 5   4780 4.0852e+11  6 3.4410e+08  0.6710    0.6731    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The ANOVA analysis show that, by controlling day, the model is significantly improved the fit \(F(6,4798)=42.0, p < .0001\) and when continue adding the interaction effect of day on work from home policy to the model, it is significantly improve the model fit more \(F(6,4792)=13.7, p < .0001\). But adding more interaction effect of day to rule of 6 indoors and eat out to help out policy are not significantly improve the model fit \(F(6,4786)=1.48, p = 0.18\) and \(F(6,4780)=0.67, p = 0.67\) respectively.

# The effect of month on bike hires

# Adding month as control variable to the regression model we already created above.
m.bike.withmonth <- lm(Hires ~  eat_out_to_help_out + month + wfh * rule_of_6_indoors, data = bike)
summary(m.bike.withmonth) # Month variable itself has significant main effect to the bike hires
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + month + wfh * rule_of_6_indoors, 
##     data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29124  -5177    377   5515  38567 
## 
## Coefficients:
##                                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                           18380.9      390.7  47.052  < 2e-16 ***
## eat_out_to_help_outEat out to help out                 5357.2     1512.1   3.543 0.000400 ***
## monthFeb                                               1672.2      558.2   2.996 0.002750 ** 
## monthMar                                               4099.3      545.1   7.521 6.46e-14 ***
## monthApr                                               7434.9      549.9  13.520  < 2e-16 ***
## monthMay                                              11837.1      546.0  21.678  < 2e-16 ***
## monthJun                                              14775.9      553.3  26.703  < 2e-16 ***
## monthJul                                              16307.0      545.6  29.889  < 2e-16 ***
## monthAug                                              12680.4      544.4  23.293  < 2e-16 ***
## monthSep                                              11910.2      543.5  21.916  < 2e-16 ***
## monthOct                                               8990.1      546.8  16.441  < 2e-16 ***
## monthNov                                               4727.9      549.9   8.598  < 2e-16 ***
## monthDec                                              -1442.5      545.0  -2.647 0.008157 ** 
## wfhWork from home                                      1385.4      278.2   4.981 6.56e-07 ***
## rule_of_6_indoorsRule of 6 indoors                    12345.4     2761.9   4.470 8.01e-06 ***
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors -10251.3     2894.5  -3.542 0.000402 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7736 on 4793 degrees of freedom
## Multiple R-squared:  0.3639, Adjusted R-squared:  0.3619 
## F-statistic: 182.8 on 15 and 4793 DF,  p-value: < 2.2e-16
# Create another model with interaction effect of day on work from home, 6 indoors and eat out policy
m.bike.withmonth.wfh.iter <- lm(Hires ~  eat_out_to_help_out + month * wfh + wfh * rule_of_6_indoors, data = bike)
summary(m.bike.withmonth.wfh.iter) # The interaction effect of month to work from home is significant for some months
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + month * wfh + wfh * 
##     rule_of_6_indoors, data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29298  -5125    522   5594  38232 
## 
## Coefficients:
##                                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                           18731.7      437.9  42.778  < 2e-16 ***
## eat_out_to_help_outEat out to help out                 5434.4     1515.1   3.587 0.000338 ***
## monthFeb                                               1061.9      633.9   1.675 0.093933 .  
## monthMar                                               3590.0      627.1   5.725 1.10e-08 ***
## monthApr                                               7273.4      641.8  11.333  < 2e-16 ***
## monthMay                                              11071.7      636.2  17.402  < 2e-16 ***
## monthJun                                              13316.8      641.8  20.750  < 2e-16 ***
## monthJul                                              16130.5      627.6  25.701  < 2e-16 ***
## monthAug                                              12252.3      603.8  20.293  < 2e-16 ***
## monthSep                                              12139.2      604.2  20.092  < 2e-16 ***
## monthOct                                               8943.7      605.0  14.782  < 2e-16 ***
## monthNov                                               4328.2      609.8   7.098 1.45e-12 ***
## monthDec                                              -1594.2      613.5  -2.599 0.009389 ** 
## wfhWork from home                                      -134.6      911.5  -0.148 0.882576    
## rule_of_6_indoorsRule of 6 indoors                    11765.7     2757.4   4.267 2.02e-05 ***
## monthFeb:wfhWork from home                             2653.8     1322.3   2.007 0.044816 *  
## monthMar:wfhWork from home                             2111.5     1258.1   1.678 0.093348 .  
## monthApr:wfhWork from home                              904.8     1243.5   0.728 0.466872    
## monthMay:wfhWork from home                             2913.9     1239.2   2.351 0.018742 *  
## monthJun:wfhWork from home                             5217.6     1264.6   4.126 3.75e-05 ***
## monthJul:wfhWork from home                              946.4     1261.3   0.750 0.453094    
## monthAug:wfhWork from home                             2025.9     1400.9   1.446 0.148182    
## monthSep:wfhWork from home                            -1976.3     1380.8  -1.431 0.152415    
## monthOct:wfhWork from home                             -359.6     1421.3  -0.253 0.800268    
## monthNov:wfhWork from home                             1838.1     1414.8   1.299 0.193942    
## monthDec:wfhWork from home                              529.6     1322.6   0.400 0.688845    
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors -10054.8     2906.4  -3.459 0.000546 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7710 on 4782 degrees of freedom
## Multiple R-squared:  0.3696, Adjusted R-squared:  0.3661 
## F-statistic: 107.8 on 26 and 4782 DF,  p-value: < 2.2e-16
m.bike.withmonth.wfh.6indoors.iter <- lm(Hires ~  eat_out_to_help_out + month * wfh + month * rule_of_6_indoors + wfh * rule_of_6_indoors, data = bike)
summary(m.bike.withmonth.wfh.6indoors.iter) # The interaction effect of month to rule of 6 indoors policy is not statistically significant.
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + month * wfh + month * 
##     rule_of_6_indoors + wfh * rule_of_6_indoors, data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29298  -5128    468   5592  38232 
## 
## Coefficients: (7 not defined because of singularities)
##                                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                           18731.7      437.9  42.779  < 2e-16 ***
## eat_out_to_help_outEat out to help out                 5434.4     1515.1   3.587 0.000338 ***
## monthFeb                                               1061.9      633.8   1.675 0.093924 .  
## monthMar                                               3590.0      627.1   5.725 1.10e-08 ***
## monthApr                                               7273.4      641.8  11.333  < 2e-16 ***
## monthMay                                              11071.7      636.2  17.402  < 2e-16 ***
## monthJun                                              13316.8      641.8  20.750  < 2e-16 ***
## monthJul                                              16130.5      627.6  25.701  < 2e-16 ***
## monthAug                                              12252.3      603.8  20.294  < 2e-16 ***
## monthSep                                              12139.2      604.2  20.092  < 2e-16 ***
## monthOct                                               8943.7      605.0  14.783  < 2e-16 ***
## monthNov                                               4328.2      609.8   7.098 1.45e-12 ***
## monthDec                                              -1594.2      613.4  -2.599 0.009387 ** 
## wfhWork from home                                      -134.6      911.5  -0.148 0.882573    
## rule_of_6_indoorsRule of 6 indoors                     6591.8     4498.8   1.465 0.142919    
## monthFeb:wfhWork from home                             2653.8     1322.3   2.007 0.044810 *  
## monthMar:wfhWork from home                             2111.5     1258.0   1.678 0.093339 .  
## monthApr:wfhWork from home                              904.8     1243.5   0.728 0.466860    
## monthMay:wfhWork from home                             3194.4     1260.6   2.534 0.011308 *  
## monthJun:wfhWork from home                             5013.6     1308.2   3.832 0.000129 ***
## monthJul:wfhWork from home                              697.7     1293.1   0.540 0.589512    
## monthAug:wfhWork from home                             2025.9     1400.8   1.446 0.148171    
## monthSep:wfhWork from home                            -2321.4     1424.4  -1.630 0.103224    
## monthOct:wfhWork from home                              312.6     1515.7   0.206 0.836594    
## monthNov:wfhWork from home                             1838.1     1414.8   1.299 0.193930    
## monthDec:wfhWork from home                              529.6     1322.6   0.400 0.688837    
## monthFeb:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthMar:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthApr:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthMay:rule_of_6_indoorsRule of 6 indoors             286.0     3084.6   0.093 0.926131    
## monthJun:rule_of_6_indoorsRule of 6 indoors            3421.0     2765.6   1.237 0.216151    
## monthJul:rule_of_6_indoorsRule of 6 indoors            4138.1     2991.3   1.383 0.166620    
## monthAug:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthSep:rule_of_6_indoorsRule of 6 indoors            5173.9     3554.7   1.456 0.145594    
## monthOct:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthNov:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthDec:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors  -7485.8     3902.8  -1.918 0.055163 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7710 on 4778 degrees of freedom
## Multiple R-squared:  0.3701, Adjusted R-squared:  0.3662 
## F-statistic: 93.59 on 30 and 4778 DF,  p-value: < 2.2e-16
m.bike.withmonth.wfh.6indoors.eat.iter <- lm(Hires ~  eat_out_to_help_out + month * wfh + month * rule_of_6_indoors + month * eat_out_to_help_out  + wfh * rule_of_6_indoors, data = bike)
summary(m.bike.withmonth.wfh.6indoors.eat.iter) # There was no prediction power for interaction effect between month and eat out policy.
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + month * wfh + month * 
##     rule_of_6_indoors + month * eat_out_to_help_out + wfh * rule_of_6_indoors, 
##     data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29298  -5128    468   5592  38232 
## 
## Coefficients: (18 not defined because of singularities)
##                                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                           18731.7      437.9  42.779  < 2e-16 ***
## eat_out_to_help_outEat out to help out                 5434.4     1515.1   3.587 0.000338 ***
## monthFeb                                               1061.9      633.8   1.675 0.093924 .  
## monthMar                                               3590.0      627.1   5.725 1.10e-08 ***
## monthApr                                               7273.4      641.8  11.333  < 2e-16 ***
## monthMay                                              11071.7      636.2  17.402  < 2e-16 ***
## monthJun                                              13316.8      641.8  20.750  < 2e-16 ***
## monthJul                                              16130.5      627.6  25.701  < 2e-16 ***
## monthAug                                              12252.3      603.8  20.294  < 2e-16 ***
## monthSep                                              12139.2      604.2  20.092  < 2e-16 ***
## monthOct                                               8943.7      605.0  14.783  < 2e-16 ***
## monthNov                                               4328.2      609.8   7.098 1.45e-12 ***
## monthDec                                              -1594.2      613.4  -2.599 0.009387 ** 
## wfhWork from home                                      -134.6      911.5  -0.148 0.882573    
## rule_of_6_indoorsRule of 6 indoors                     6591.8     4498.8   1.465 0.142919    
## monthFeb:wfhWork from home                             2653.8     1322.3   2.007 0.044810 *  
## monthMar:wfhWork from home                             2111.5     1258.0   1.678 0.093339 .  
## monthApr:wfhWork from home                              904.8     1243.5   0.728 0.466860    
## monthMay:wfhWork from home                             3194.4     1260.6   2.534 0.011308 *  
## monthJun:wfhWork from home                             5013.6     1308.2   3.832 0.000129 ***
## monthJul:wfhWork from home                              697.7     1293.1   0.540 0.589512    
## monthAug:wfhWork from home                             2025.9     1400.8   1.446 0.148171    
## monthSep:wfhWork from home                            -2321.4     1424.4  -1.630 0.103224    
## monthOct:wfhWork from home                              312.6     1515.7   0.206 0.836594    
## monthNov:wfhWork from home                             1838.1     1414.8   1.299 0.193930    
## monthDec:wfhWork from home                              529.6     1322.6   0.400 0.688837    
## monthFeb:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthMar:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthApr:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthMay:rule_of_6_indoorsRule of 6 indoors             286.0     3084.6   0.093 0.926131    
## monthJun:rule_of_6_indoorsRule of 6 indoors            3421.0     2765.6   1.237 0.216151    
## monthJul:rule_of_6_indoorsRule of 6 indoors            4138.1     2991.3   1.383 0.166620    
## monthAug:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthSep:rule_of_6_indoorsRule of 6 indoors            5173.9     3554.7   1.456 0.145594    
## monthOct:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthNov:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## monthDec:rule_of_6_indoorsRule of 6 indoors                NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthFeb            NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthMar            NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthApr            NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthMay            NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthJun            NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthJul            NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthAug            NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthSep            NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthOct            NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthNov            NA         NA      NA       NA    
## eat_out_to_help_outEat out to help out:monthDec            NA         NA      NA       NA    
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors  -7485.8     3902.8  -1.918 0.055163 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7710 on 4778 degrees of freedom
## Multiple R-squared:  0.3701, Adjusted R-squared:  0.3662 
## F-statistic: 93.59 on 30 and 4778 DF,  p-value: < 2.2e-16
# Using ANOVA to compare the improvement of models when controlling day
anova(m.bike.eat.iter, m.bike.withmonth, m.bike.withmonth.wfh.iter, m.bike.withmonth.wfh.6indoors.iter, m.bike.withmonth.wfh.6indoors.eat.iter)
## Analysis of Variance Table
## 
## Model 1: Hires ~ eat_out_to_help_out + wfh * rule_of_6_indoors
## Model 2: Hires ~ eat_out_to_help_out + month + wfh * rule_of_6_indoors
## Model 3: Hires ~ eat_out_to_help_out + month * wfh + wfh * rule_of_6_indoors
## Model 4: Hires ~ eat_out_to_help_out + month * wfh + month * rule_of_6_indoors + 
##     wfh * rule_of_6_indoors
## Model 5: Hires ~ eat_out_to_help_out + month * wfh + month * rule_of_6_indoors + 
##     month * eat_out_to_help_out + wfh * rule_of_6_indoors
##   Res.Df        RSS Df  Sum of Sq        F    Pr(>F)    
## 1   4804 4.3816e+11                                     
## 2   4793 2.8682e+11 11 1.5134e+11 231.4711 < 2.2e-16 ***
## 3   4782 2.8424e+11 11 2.5771e+09   3.9417 1.001e-05 ***
## 4   4778 2.8399e+11  4 2.5331e+08   1.0654    0.3719    
## 5   4778 2.8399e+11  0 0.0000e+00                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The ANOVA analysis show that, by controlling month, the fit of model is significantly improved \(F(11,4793)=231.5, p < .0001\) and continue significantly improved when adding more interaction effect of month on work from home policy \(F(6,4782)=3.9, p < .0001\). On the contrary, adding more interaction effect of month to rule of 6 indoors is not significantly improve the model fit \(F(6,4778)=1.1, p = 0.37\).

color_4 <- c( "#d94801", "#7bccc4", "#00441b", "#4eb3d3", "#0868ac", "#8c6bb1","#fc4e2a" )

# Plot the interaction effect of day and month
m.bike.withdaymonth <- lm(Hires ~ day*month, data = bike)
m.bike.withdaymonth.emm <- emmeans(m.bike.withdaymonth, ~day*month)

ggplot(summary(m.bike.withdaymonth.emm), aes(x=month, y=emmean, ymin=lower.CL, ymax=upper.CL, colour=day, group=day, alpha=0.5))+ 
  scale_colour_manual(values = color_4)+
  geom_point(show.legend = FALSE) + 
  geom_linerange(show.legend = FALSE) + 
  labs(x="Month", y="Predicted Bike Hires", colour="Day", title = "Number of bike hires prediction with the effect of month and day") + 
  geom_line()+
  guides(alpha = "none")
Figure 1.6 Number of bike hires prediction with the effect of month and day

Figure 1.6 Number of bike hires prediction with the effect of month and day

The graph show that the slope from low season to high season on weekends is higher than the slop of weekdays which mean day and month have different interaction effect on weekends and weekdays. Thus, we should taking interaction effect of day and month into account.

# Create another regression model by adding both day and month as predictor
m.bike.withdaymonth.wfh.iter <- lm(Hires ~  eat_out_to_help_out + day * wfh + month * wfh + wfh * rule_of_6_indoors, data = bike)
summary(m.bike.withdaymonth.wfh.iter)
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + day * wfh + month * 
##     wfh + wfh * rule_of_6_indoors, data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -28935  -4639    501   4788  36508 
## 
## Coefficients:
##                                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                           13950.3      509.4  27.384  < 2e-16 ***
## eat_out_to_help_outEat out to help out                 5434.8     1438.9   3.777 0.000161 ***
## dayMon                                                 4892.5      449.0  10.897  < 2e-16 ***
## dayTue                                                 6933.7      449.4  15.430  < 2e-16 ***
## dayWed                                                 7141.7      449.4  15.893  < 2e-16 ***
## dayThu                                                 7074.4      449.4  15.743  < 2e-16 ***
## dayFri                                                 5710.4      449.1  12.714  < 2e-16 ***
## daySat                                                 1487.6      448.9   3.314 0.000928 ***
## wfhWork from home                                      3212.1     1059.0   3.033 0.002434 ** 
## monthFeb                                               1097.3      602.0   1.823 0.068393 .  
## monthMar                                               3654.8      595.5   6.137 9.09e-10 ***
## monthApr                                               7335.3      609.5  12.035  < 2e-16 ***
## monthMay                                              11039.9      604.2  18.271  < 2e-16 ***
## monthJun                                              13387.3      609.5  21.964  < 2e-16 ***
## monthJul                                              16171.2      596.1  27.130  < 2e-16 ***
## monthAug                                              12284.7      573.4  21.425  < 2e-16 ***
## monthSep                                              12185.8      573.8  21.238  < 2e-16 ***
## monthOct                                               8986.1      574.6  15.639  < 2e-16 ***
## monthNov                                               4337.1      579.1   7.489 8.21e-14 ***
## monthDec                                              -1522.3      582.6  -2.613 0.009006 ** 
## rule_of_6_indoorsRule of 6 indoors                    11733.8     2618.9   4.480 7.62e-06 ***
## dayMon:wfhWork from home                              -5516.0      946.5  -5.828 5.98e-09 ***
## dayTue:wfhWork from home                              -4908.4      944.7  -5.196 2.13e-07 ***
## dayWed:wfhWork from home                              -5405.7      944.7  -5.722 1.12e-08 ***
## dayThu:wfhWork from home                              -4951.1      944.7  -5.241 1.67e-07 ***
## dayFri:wfhWork from home                              -4402.5      944.4  -4.662 3.22e-06 ***
## daySat:wfhWork from home                               2263.2      946.4   2.391 0.016824 *  
## wfhWork from home:monthFeb                             2578.9     1255.9   2.053 0.040089 *  
## wfhWork from home:monthMar                             1993.5     1195.2   1.668 0.095407 .  
## wfhWork from home:monthApr                              767.5     1181.2   0.650 0.515881    
## wfhWork from home:monthMay                             2947.6     1176.9   2.505 0.012293 *  
## wfhWork from home:monthJun                             5104.9     1201.2   4.250 2.18e-05 ***
## wfhWork from home:monthJul                              855.5     1197.9   0.714 0.475141    
## wfhWork from home:monthAug                             1951.1     1330.8   1.466 0.142676    
## wfhWork from home:monthSep                            -2099.9     1311.8  -1.601 0.109487    
## wfhWork from home:monthOct                             -462.4     1349.9  -0.343 0.731974    
## wfhWork from home:monthNov                             1835.6     1343.7   1.366 0.171980    
## wfhWork from home:monthDec                              376.0     1256.5   0.299 0.764787    
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors -10034.0     2760.4  -3.635 0.000281 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7322 on 4770 degrees of freedom
## Multiple R-squared:  0.4329, Adjusted R-squared:  0.4283 
## F-statistic: 95.81 on 38 and 4770 DF,  p-value: < 2.2e-16
# Create another regression model by include interaction effect between day and month
m.bike.withdaymonth.wfh.iter2 <- lm(Hires ~  eat_out_to_help_out + day * wfh + month * wfh + day * month + wfh * rule_of_6_indoors, data = bike)
summary(m.bike.withdaymonth.wfh.iter2)
## 
## Call:
## lm(formula = Hires ~ eat_out_to_help_out + day * wfh + month * 
##     wfh + day * month + wfh * rule_of_6_indoors, data = bike)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -27873  -4578    572   4773  37663 
## 
## Coefficients:
##                                                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                                           12272.93     988.12  12.420  < 2e-16 ***
## eat_out_to_help_outEat out to help out                 5429.75    1434.90   3.784 0.000156 ***
## dayMon                                                 8340.58    1370.74   6.085 1.26e-09 ***
## dayTue                                                 9094.99    1369.49   6.641 3.47e-11 ***
## dayWed                                                 9087.26    1374.60   6.611 4.25e-11 ***
## dayThu                                                 8905.87    1374.61   6.479 1.02e-10 ***
## dayFri                                                 8118.17    1375.61   5.902 3.85e-09 ***
## daySat                                                 1373.34    1376.95   0.997 0.318633    
## wfhWork from home                                      3080.33    1061.30   2.902 0.003720 ** 
## monthFeb                                               1100.45    1421.62   0.774 0.438921    
## monthMar                                               3845.16    1388.48   2.769 0.005639 ** 
## monthApr                                              10828.10    1401.47   7.726 1.34e-14 ***
## monthMay                                              15939.50    1392.81  11.444  < 2e-16 ***
## monthJun                                              15643.26    1405.77  11.128  < 2e-16 ***
## monthJul                                              18022.59    1382.36  13.038  < 2e-16 ***
## monthAug                                              16053.47    1363.88  11.770  < 2e-16 ***
## monthSep                                              14118.08    1371.30  10.295  < 2e-16 ***
## monthOct                                               9666.92    1378.06   7.015 2.63e-12 ***
## monthNov                                               4284.11    1397.23   3.066 0.002181 ** 
## monthDec                                               -210.10    1383.66  -0.152 0.879315    
## rule_of_6_indoorsRule of 6 indoors                    11848.09    2613.86   4.533 5.97e-06 ***
## dayMon:wfhWork from home                              -5322.30     955.52  -5.570 2.69e-08 ***
## dayTue:wfhWork from home                              -4648.59     953.46  -4.876 1.12e-06 ***
## dayWed:wfhWork from home                              -5164.00     953.59  -5.415 6.42e-08 ***
## dayThu:wfhWork from home                              -4706.96     953.82  -4.935 8.30e-07 ***
## dayFri:wfhWork from home                              -4106.85     953.58  -4.307 1.69e-05 ***
## daySat:wfhWork from home                               2340.71     955.98   2.448 0.014382 *  
## wfhWork from home:monthFeb                             2523.83    1252.81   2.015 0.044010 *  
## wfhWork from home:monthMar                             1889.06    1192.67   1.584 0.113286    
## wfhWork from home:monthApr                              709.96    1178.50   0.602 0.546919    
## wfhWork from home:monthMay                             2824.40    1174.41   2.405 0.016213 *  
## wfhWork from home:monthJun                             5078.08    1198.44   4.237 2.31e-05 ***
## wfhWork from home:monthJul                              799.14    1195.32   0.669 0.503811    
## wfhWork from home:monthAug                             1966.60    1327.84   1.481 0.138660    
## wfhWork from home:monthSep                            -2139.05    1308.93  -1.634 0.102285    
## wfhWork from home:monthOct                             -489.53    1346.82  -0.363 0.716271    
## wfhWork from home:monthNov                             1792.17    1340.57   1.337 0.181331    
## wfhWork from home:monthDec                              298.63    1253.76   0.238 0.811744    
## dayMon:monthFeb                                        -866.86    1963.24  -0.442 0.658839    
## dayTue:monthFeb                                         426.19    1968.26   0.217 0.828584    
## dayWed:monthFeb                                         378.26    1967.65   0.192 0.847563    
## dayThu:monthFeb                                         203.72    1972.57   0.103 0.917747    
## dayFri:monthFeb                                        -541.45    1972.31  -0.275 0.783691    
## daySat:monthFeb                                         434.19    1967.34   0.221 0.825335    
## dayMon:monthMar                                        -840.91    1926.32  -0.437 0.662467    
## dayTue:monthMar                                         107.15    1918.63   0.056 0.955467    
## dayWed:monthMar                                        -453.04    1923.23  -0.236 0.813782    
## dayThu:monthMar                                        -207.71    1918.59  -0.108 0.913794    
## dayFri:monthMar                                       -1270.58    1922.05  -0.661 0.508610    
## daySat:monthMar                                        1482.82    1926.01   0.770 0.441401    
## dayMon:monthApr                                       -6221.50    1931.07  -3.222 0.001283 ** 
## dayTue:monthApr                                       -4119.85    1935.95  -2.128 0.033383 *  
## dayWed:monthApr                                       -4277.50    1940.87  -2.204 0.027579 *  
## dayThu:monthApr                                       -4265.19    1941.38  -2.197 0.028070 *  
## dayFri:monthApr                                       -4876.19    1936.10  -2.519 0.011816 *  
## daySat:monthApr                                        -665.13    1931.22  -0.344 0.730554    
## dayMon:monthMay                                       -7764.28    1915.00  -4.054 5.11e-05 ***
## dayTue:monthMay                                       -7160.99    1910.99  -3.747 0.000181 ***
## dayWed:monthMay                                       -6483.72    1919.62  -3.378 0.000737 ***
## dayThu:monthMay                                       -5916.27    1923.75  -3.075 0.002114 ** 
## dayFri:monthMay                                       -5197.30    1923.29  -2.702 0.006911 ** 
## daySat:monthMay                                       -1480.04    1927.48  -0.768 0.442607    
## dayMon:monthJun                                       -4836.11    1940.18  -2.493 0.012715 *  
## dayTue:monthJun                                       -2961.29    1941.14  -1.526 0.127191    
## dayWed:monthJun                                       -2369.61    1941.15  -1.221 0.222251    
## dayThu:monthJun                                       -3035.23    1936.67  -1.567 0.117124    
## dayFri:monthJun                                       -3494.01    1940.07  -1.801 0.071771 .  
## daySat:monthJun                                         902.63    1939.47   0.465 0.641665    
## dayMon:monthJul                                       -3503.48    1909.69  -1.835 0.066631 .  
## dayTue:monthJul                                       -2734.58    1914.22  -1.429 0.153198    
## dayWed:monthJul                                       -1997.64    1918.71  -1.041 0.297866    
## dayThu:monthJul                                        -872.11    1923.24  -0.453 0.650241    
## dayFri:monthJul                                       -3644.11    1910.14  -1.908 0.056482 .  
## daySat:monthJul                                         -85.83    1909.82  -0.045 0.964156    
## dayMon:monthAug                                       -7289.96    1885.33  -3.867 0.000112 ***
## dayTue:monthAug                                       -4783.13    1884.94  -2.538 0.011195 *  
## dayWed:monthAug                                       -4625.75    1889.18  -2.449 0.014379 *  
## dayThu:monthAug                                       -4950.65    1892.84  -2.615 0.008939 ** 
## dayFri:monthAug                                       -4656.81    1896.93  -2.455 0.014128 *  
## daySat:monthAug                                          22.04    1897.29   0.012 0.990730    
## dayMon:monthSep                                       -4586.88    1908.48  -2.403 0.016281 *  
## dayTue:monthSep                                       -2728.60    1907.70  -1.430 0.152695    
## dayWed:monthSep                                       -2314.53    1907.97  -1.213 0.225158    
## dayThu:monthSep                                       -2059.01    1904.11  -1.081 0.279597    
## dayFri:monthSep                                       -3012.16    1904.22  -1.582 0.113755    
## daySat:monthSep                                        1206.40    1912.25   0.631 0.528152    
## dayMon:monthOct                                       -2302.96    1915.47  -1.202 0.229309    
## dayTue:monthOct                                        -356.12    1919.82  -0.185 0.852847    
## dayWed:monthOct                                       -1027.39    1923.94  -0.534 0.593365    
## dayThu:monthOct                                        -366.82    1923.67  -0.191 0.848780    
## dayFri:monthOct                                        -883.64    1923.71  -0.459 0.646009    
## daySat:monthOct                                         252.97    1915.46   0.132 0.894936    
## dayMon:monthNov                                       -1090.07    1936.71  -0.563 0.573568    
## dayTue:monthNov                                        -251.26    1932.27  -0.130 0.896544    
## dayWed:monthNov                                         522.45    1940.69   0.269 0.787780    
## dayThu:monthNov                                        1031.94    1945.45   0.530 0.595835    
## dayFri:monthNov                                         328.01    1941.16   0.169 0.865821    
## daySat:monthNov                                        -170.85    1945.81  -0.088 0.930036    
## dayMon:monthDec                                       -2054.12    1922.89  -1.068 0.285467    
## dayTue:monthDec                                       -1606.83    1922.93  -0.836 0.403414    
## dayWed:monthDec                                        -922.47    1927.25  -0.479 0.632213    
## dayThu:monthDec                                       -1742.66    1919.16  -0.908 0.363907    
## dayFri:monthDec                                       -2058.09    1922.88  -1.070 0.284531    
## daySat:monthDec                                        -688.06    1922.96  -0.358 0.720501    
## wfhWork from home:rule_of_6_indoorsRule of 6 indoors -10156.43    2755.13  -3.686 0.000230 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7301 on 4704 degrees of freedom
## Multiple R-squared:  0.4438, Adjusted R-squared:  0.4315 
## F-statistic: 36.09 on 104 and 4704 DF,  p-value: < 2.2e-16
# Using ANOVA to compare the improvement between model

anova(m.bike.eat.iter, m.bike.withdaymonth.wfh.iter, m.bike.withdaymonth.wfh.iter2)
## Analysis of Variance Table
## 
## Model 1: Hires ~ eat_out_to_help_out + wfh * rule_of_6_indoors
## Model 2: Hires ~ eat_out_to_help_out + day * wfh + month * wfh + wfh * 
##     rule_of_6_indoors
## Model 3: Hires ~ eat_out_to_help_out + day * wfh + month * wfh + day * 
##     month + wfh * rule_of_6_indoors
##   Res.Df        RSS Df  Sum of Sq        F  Pr(>F)    
## 1   4804 4.3816e+11                                   
## 2   4770 2.5571e+11 34 1.8245e+11 100.6570 < 2e-16 ***
## 3   4704 2.5077e+11 66 4.9355e+09   1.4027 0.01799 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The ANOVA analysis show that using both day and month as control variables will significantly improve the fit of model, by adding the main effect of day and month and interaction effect of day and month with work from home policy to the model, it will significantly improve the fit \(F(34,4770)=100.7, p < 0.0001\). Moreover, by adding more interaction effect between day and month itself, the fit will significantly improve \(F(66,4704)=1.4, p = 0.018\).

In summary, as mentioned in the analysis above, controlling the effect of day, month and interaction effect of day and month is appropriate to increase the prediction power of the model when control interaction effect with work from home, but there was no significant improvement when controlling effect of day and month interaction with rule of 6 indoors policy and eat out to eat out policy. While year should not be use as control variable because year have multicollinearity with work from home policy and should not be use as independent variable together.